	.file "reg_round.S"
/*---------------------------------------------------------------------------+
 |  reg_round.S                                                              |
 |                                                                           |
 | Rounding/truncation/etc for FPU basic arithmetic functions.               |
 |                                                                           |
 | Copyright (C) 1993                                                        |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail apm233m@vaxc.cc.monash.edu.au    |
 |                                                                           |
 | This code has four possible entry points.                                 |
 | The following must be entered by a jmp intruction:                        |
 |   FPU_round, FPU_round_sqrt, and FPU_Arith_exit.                          |
 |                                                                           |
 | The _round_reg entry point is intended to be used by C code.              |
 | From C, call as:                                                          |
 | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
 |                                                                           |
 | For correct "up" and "down" rounding, the argument must have the correct  |
 | sign.                                                                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 | Four entry points.                                                        |
 |                                                                           |
 | Needed by both the FPU_round and FPU_round_sqrt entry points:             |
 |  %eax:%ebx  64 bit significand                                            |
 |  %edx       32 bit extension of the significand                           |
 |  %edi       pointer to an FPU_REG for the result to be stored             |
 |  stack      calling function must have set up a C stack frame and         |
 |             pushed %esi, %edi, and %ebx                                   |
 |                                                                           |
 | Needed just for the FPU_round_sqrt entry point:                           |
 |  %cx  A control word in the same format as the FPU control word.          |
 | Otherwise, PARAM4 must give such a value.                                 |
 |                                                                           |
 |                                                                           |
 | The significand and its extension are assumed to be exact in the          |
 | following sense:                                                          |
 |   If the significand by itself is the exact result then the significand   |
 |   extension (%edx) must contain 0, otherwise the significand extension    |
 |   must be non-zero.                                                       |
 |   If the significand extension is non-zero then the significand is        |
 |   smaller than the magnitude of the correct exact result by an amount     |
 |   greater than zero and less than one ls bit of the significand.          |
 |   The significand extension is only required to have three possible       |
 |   non-zero values:                                                        |
 |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
 |                                 bit smaller than the magnitude of the     |
 |                                 true exact result.                        |
 |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
 |                                 smaller than the magnitude of the true    |
 |                                 exact result.                             |
 |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
 |                                 bit smaller than the magnitude of the     |
 |                                 true exact result.                        |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  The code in this module has become quite complex, but it should handle   |
 |  all of the FPU flags which are set at this stage of the basic arithmetic |
 |  computations.                                                            |
 |  There are a few rare cases where the results are not set identically to  |
 |  a real FPU. These require a bit more thought because at this stage the   |
 |  results of the code here appear to be more consistent...                 |
 |  This may be changed in a future version.                                 |
 +---------------------------------------------------------------------------*/


#include "fpu_asm.h"
#include "exception.h"
#include "control_w.h"

/* Flags for FPU_bits_lost */
#define	LOST_DOWN	$1
#define	LOST_UP		$2

/* Flags for FPU_denormal */
#define	DENORMAL	$1
#define	UNMASKED_UNDERFLOW $2

.data
	.align 2,0
FPU_bits_lost:
	.byte	0
FPU_denormal:
	.byte	0

.text
	.align 2,144
.globl FPU_round
.globl FPU_round_sqrt
.globl FPU_Arith_exit
.globl round_reg

// Entry point when called from C
round_reg:
	pushl	%ebp
	movl	%esp,%ebp
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM1,%edi
	movl	SIGH(%edi),%eax
	movl	SIGL(%edi),%ebx
	movl	PARAM2,%edx
	movl	PARAM3,%ecx
	jmp	FPU_round_sqrt

FPU_round:		// Normal entry point
	movl	PARAM4,%ecx

FPU_round_sqrt:		// Entry point from wm_sqrt.S

#ifdef PARANOID
// Cannot use this here yet
//	orl	%eax,%eax
//	jns	L_entry_bugged
#endif PARANOID

	cmpl	EXP_UNDER,EXP(%edi)
	jle	xMake_denorm			// The number is a de-normal

	movb	$0,FPU_denormal			// 0 -> not a de-normal

xDenorm_done:
	movb	$0,FPU_bits_lost		// No bits yet lost in rounding

	movl	%ecx,%esi
	andl	CW_PC,%ecx
	cmpl	PR_64_BITS,%ecx
	je	LRound_To_64

	cmpl	PR_53_BITS,%ecx
	je	LRound_To_53

	cmpl	PR_24_BITS,%ecx
	je	LRound_To_24

#ifdef PARANOID
	jmp	L_bugged	// There is no bug, just a bad control word
#endif PARANOID


// Round etc to 24 bit precision
LRound_To_24:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_24

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_24

	cmpl	RC_UP,%ecx		// Towards +infinity
	je	LUp_24

	cmpl	RC_DOWN,%ecx		// Towards -infinity
	je	LDown_24

#ifdef PARANOID
	jmp	L_bugged
#endif PARANOID

LUp_24:
	cmpb	SIGN_POS,SIGN(%edi)
	jne	LCheck_truncate_24	// If negative then  up==truncate

	jmp	LCheck_24_round_up

LDown_24:
	cmpb	SIGN_POS,SIGN(%edi)
	je	LCheck_truncate_24	// If positive then  down==truncate

LCheck_24_round_up:
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	orl	%ebx,%ecx
	orl	%edx,%ecx
	jnz	LDo_24_round_up
	jmp	LRe_normalise

LRound_nearest_24:
	// Do rounding of the 24th bit if needed (nearest or even)
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	cmpl	$0x00000080,%ecx
	jc	LCheck_truncate_24	// less than half, no increment needed

	jne	LGreater_Half_24	// greater than half, increment needed

	// Possibly half, we need to check the ls bits
	orl	%ebx,%ebx
	jnz	LGreater_Half_24	// greater than half, increment needed

	orl	%edx,%edx
	jnz	LGreater_Half_24	// greater than half, increment needed

	// Exactly half, increment only if 24th bit is 1 (round to even)
	testl	$0x00000100,%eax
	jz	LDo_truncate_24

LGreater_Half_24:			// Rounding: increment at the 24th bit
LDo_24_round_up:
	andl	$0xffffff00,%eax	// Truncate to 24 bits
	xorl	%ebx,%ebx
	movb	LOST_UP,FPU_bits_lost
	addl	$0x00000100,%eax
	jmp	LCheck_Round_Overflow

LCheck_truncate_24:
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	orl	%ebx,%ecx
	orl	%edx,%ecx
	jz	LRe_normalise			// No truncation needed

LDo_truncate_24:
	andl	$0xffffff00,%eax	// Truncate to 24 bits
	xorl	%ebx,%ebx
	movb	LOST_DOWN,FPU_bits_lost
	jmp	LRe_normalise


// Round etc to 53 bit precision
LRound_To_53:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_53

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_53

	cmpl	RC_UP,%ecx		// Towards +infinity
	je	LUp_53

	cmpl	RC_DOWN,%ecx		// Towards -infinity
	je	LDown_53

#ifdef PARANOID
	jmp	L_bugged
#endif PARANOID

LUp_53:
	cmpb	SIGN_POS,SIGN(%edi)
	jne	LCheck_truncate_53	// If negative then  up==truncate

	jmp	LCheck_53_round_up

LDown_53:
	cmpb	SIGN_POS,SIGN(%edi)
	je	LCheck_truncate_53	// If positive then  down==truncate

LCheck_53_round_up:
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	orl	%edx,%ecx
	jnz	LDo_53_round_up
	jmp	LRe_normalise

LRound_nearest_53:
	// Do rounding of the 53rd bit if needed (nearest or even)
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	cmpl	$0x00000400,%ecx
	jc	LCheck_truncate_53	// less than half, no increment needed

	jnz	LGreater_Half_53	// greater than half, increment needed

	// Possibly half, we need to check the ls bits
	orl	%edx,%edx
	jnz	LGreater_Half_53	// greater than half, increment needed

	// Exactly half, increment only if 53rd bit is 1 (round to even)
	testl	$0x00000800,%ebx
	jz	LTruncate_53

LGreater_Half_53:			// Rounding: increment at the 53rd bit
LDo_53_round_up:
	movb	LOST_UP,FPU_bits_lost
	andl	$0xfffff800,%ebx	// Truncate to 53 bits
	addl	$0x00000800,%ebx
	adcl	$0,%eax
	jmp	LCheck_Round_Overflow

LCheck_truncate_53:
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	orl	%edx,%ecx
	jz	LRe_normalise

LTruncate_53:
	movb	LOST_DOWN,FPU_bits_lost
	andl	$0xfffff800,%ebx	// Truncate to 53 bits
	jmp	LRe_normalise


// Round etc to 64 bit precision
LRound_To_64:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_64

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_64

	cmpl	RC_UP,%ecx		// Towards +infinity
	je	LUp_64

	cmpl	RC_DOWN,%ecx		// Towards -infinity
	je	LDown_64

#ifdef PARANOID
	jmp	L_bugged
#endif PARANOID

LUp_64:
	cmpb	SIGN_POS,SIGN(%edi)
	jne	LCheck_truncate_64	// If negative then  up==truncate

	orl	%edx,%edx
	jnz	LDo_64_round_up
	jmp	LRe_normalise

LDown_64:
	cmpb	SIGN_POS,SIGN(%edi)
	je	LCheck_truncate_64	// If positive then  down==truncate

	orl	%edx,%edx
	jnz	LDo_64_round_up
	jmp	LRe_normalise

LRound_nearest_64:
	cmpl	$0x80000000,%edx
	jc	LCheck_truncate_64

	jne	LDo_64_round_up

	/* Now test for round-to-even */
	testb	$1,%ebx
	jz	LCheck_truncate_64

LDo_64_round_up:
	movb	LOST_UP,FPU_bits_lost
	addl	$1,%ebx
	adcl	$0,%eax

LCheck_Round_Overflow:
	jnc	LRe_normalise

	/* Overflow, adjust the result (significand to 1.0) */
	rcrl	$1,%eax
	rcrl	$1,%ebx
	incl	EXP(%edi)
	jmp	LRe_normalise

LCheck_truncate_64:
	orl	%edx,%edx
	jz	LRe_normalise

LTruncate_64:
	movb	LOST_DOWN,FPU_bits_lost

LRe_normalise:
	testb	$0xff,FPU_denormal
	jnz	xNormalise_result

xL_Normalised:
	cmpb	LOST_UP,FPU_bits_lost
	je	xL_precision_lost_up

	cmpb	LOST_DOWN,FPU_bits_lost
	je	xL_precision_lost_down

xL_no_precision_loss:
	/* store the result */
	movb	TW_Valid,TAG(%edi)

xL_Store_significand:
	movl	%eax,SIGH(%edi)
	movl	%ebx,SIGL(%edi)

	xorl	%eax,%eax	// No errors detected.

	cmpl	EXP_OVER,EXP(%edi)
	jge	L_overflow

FPU_Arith_exit:
	popl	%ebx
	popl	%edi
	popl	%esi
	leave
	ret


// Set the FPU status flags to represent precision loss due to
// round-up.
xL_precision_lost_up:
	push	%eax
	call	set_precision_flag_up
	popl	%eax
	jmp	xL_no_precision_loss

// Set the FPU status flags to represent precision loss due to
// truncation.
xL_precision_lost_down:
	push	%eax
	call	set_precision_flag_down
	popl	%eax
	jmp	xL_no_precision_loss


// The number is a denormal (which might get rounded up to a normal)
// Shift the number right the required number of bits, which will
// have to be undone later...
xMake_denorm:
	// The action to be taken depends upon whether the underflow
	// exception is masked
	testb	CW_Underflow,%cl		// Underflow mask.
	jz	xUnmasked_underflow		// Do not make a denormal.

	movb	DENORMAL,FPU_denormal

	pushl	%ecx		// Save
	movl	EXP_UNDER+1,%ecx
	subl	EXP(%edi),%ecx

	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
	jnc	xDenorm_shift_more_than_63

	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
	jnc	xDenorm_shift_more_than_32

// We got here without jumps by assuming that the most common requirement
//   is for a small de-normalising shift.
// Shift by [1..31] bits
	addl	%ecx,EXP(%edi)
	orl	%edx,%edx	// extension
	setne	%ch		// Save whether %edx is non-zero
	xorl	%edx,%edx
	shrd	%cl,%ebx,%edx
	shrd	%cl,%eax,%ebx
	shr	%cl,%eax
	orb	%ch,%dl
	popl	%ecx
	jmp	xDenorm_done

// Shift by [32..63] bits
xDenorm_shift_more_than_32:
	addl	%ecx,EXP(%edi)
	subb	$32,%cl
	orl	%edx,%edx
	setne	%ch
	orb	%ch,%bl
	xorl	%edx,%edx
	shrd	%cl,%ebx,%edx
	shrd	%cl,%eax,%ebx
	shr	%cl,%eax
	orl	%edx,%edx		// test these 32 bits
	setne	%cl
	orb	%ch,%bl
	orb	%cl,%bl
	movl	%ebx,%edx
	movl	%eax,%ebx
	xorl	%eax,%eax
	popl	%ecx
	jmp	xDenorm_done

// Shift by [64..) bits
xDenorm_shift_more_than_63:
	cmpl	$64,%ecx
	jne	xDenorm_shift_more_than_64

// Exactly 64 bit shift
	addl	%ecx,EXP(%edi)
	xorl	%ecx,%ecx
	orl	%edx,%edx
	setne	%cl
	orl	%ebx,%ebx
	setne	%ch
	orb	%ch,%cl
	orb	%cl,%al
	movl	%eax,%edx
	xorl	%eax,%eax
	xorl	%ebx,%ebx
	popl	%ecx
	jmp	xDenorm_done

xDenorm_shift_more_than_64:
	movl	EXP_UNDER+1,EXP(%edi)
// This is easy, %eax must be non-zero, so..
	movl	$1,%edx
	xorl	%eax,%eax
	xorl	%ebx,%ebx
	popl	%ecx
	jmp	xDenorm_done


xUnmasked_underflow:
	movb	UNMASKED_UNDERFLOW,FPU_denormal
	jmp	xDenorm_done


// Undo the de-normalisation.
xNormalise_result:
	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
	je	xSignal_underflow

// The number must be a denormal if we got here.
#ifdef PARANOID
	// But check it... just in case.
	cmpl	EXP_UNDER+1,EXP(%edi)
	jne	L_norm_bugged
#endif PARANOID

#ifdef PECULIAR_486
	// This implements a special feature of 80486 behaviour.
	// Underflow will be signalled even if the number is
	// not a denormal after rounding.
	// This difference occurs only for masked underflow, and not
	// in the unmasked case.
	// Actual 80486 behaviour differs from this in some circumstances.
	orl	%eax,%eax		// ms bits
	js	LNormalise_shift_done	// Will be masked underflow
#endif PECULIAR_486

	orl	%eax,%eax		// ms bits
	js	xL_Normalised		// No longer a denormal

	jnz	LNormalise_shift_up_to_31	// Shift left 0 - 31 bits

	orl	%ebx,%ebx
	jz	L_underflow_to_zero	// The contents are zero

// Shift left 32 - 63 bits
	movl	%ebx,%eax
	xorl	%ebx,%ebx
	subl	$32,EXP(%edi)

LNormalise_shift_up_to_31:
	bsrl	%eax,%ecx	/* get the required shift in %ecx */
	subl	$31,%ecx
	negl	%ecx
	shld	%cl,%ebx,%eax
	shl	%cl,%ebx
	subl	%ecx,EXP(%edi)

LNormalise_shift_done:
	testb	$0xff,FPU_bits_lost	// bits lost == underflow
	jz	xL_Normalised

	// There must be a masked underflow
	push	%eax
	pushl	EX_Underflow
	call	exception
	popl	%eax
	popl	%eax
	jmp	xL_Normalised


// The operations resulted in a number too small to represent.
// Masked response.
L_underflow_to_zero:
	push	%eax
	call	set_precision_flag_down
	popl	%eax

	push	%eax
	pushl	EX_Underflow
	call	exception
	popl	%eax
	popl	%eax

// Reduce the exponent to EXP_UNDER
	movl	EXP_UNDER,EXP(%edi)
	movb	TW_Zero,TAG(%edi)
	jmp	xL_Store_significand


// The operations resulted in a number too large to represent.
L_overflow:
	push	%edi
	call	arith_overflow
	pop	%edi
	jmp	FPU_Arith_exit


xSignal_underflow:
	// The number may have been changed to a non-denormal
	// by the rounding operations.
	cmpl	EXP_UNDER,EXP(%edi)
	jle	xDo_unmasked_underflow

	jmp	xL_Normalised

xDo_unmasked_underflow:
	// Increase the exponent by the magic number
	addl	$(3*(1<<13)),EXP(%edi)
	push	%eax
	pushl	EX_Underflow
	call	EXCEPTION
	popl	%eax
	popl	%eax
	jmp	xL_Normalised


#ifdef PARANOID
/* If we ever get here then we have problems! */
L_bugged:
	pushl	EX_INTERNAL|0x201
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit

L_norm_bugged:
	pushl	EX_INTERNAL|0x216
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit

L_entry_bugged:
	pushl	EX_INTERNAL|0x217
	call	EXCEPTION
	popl	%ebx
L_exception_exit:
	mov	$1,%eax
	jmp	FPU_Arith_exit
#endif PARANOID
